{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 160,
   "id": "9e3a3483-93f1-4af6-85ae-c3252912bac2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "id": "3d287bd4-81ad-4ad6-af6e-0b5bfd7ebfa2",
   "metadata": {},
   "outputs": [],
   "source": [
    "train=pd.read_csv('train.csv')\n",
    "test=pd.read_csv('test.csv')\n",
    "datas=pd.concat([train,test], ignore_index = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "9e2f1df3-a62e-40c8-9a9f-92c62775a5d6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C123</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1       0.0       3   \n",
       "1            2       1.0       1   \n",
       "2            3       1.0       3   \n",
       "3            4       1.0       1   \n",
       "4            5       0.0       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
       "\n",
       "   Parch            Ticket     Fare Cabin Embarked  \n",
       "0      0         A/5 21171   7.2500   NaN        S  \n",
       "1      0          PC 17599  71.2833   C85        C  \n",
       "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
       "3      0            113803  53.1000  C123        S  \n",
       "4      0            373450   8.0500   NaN        S  "
      ]
     },
     "execution_count": 162,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datas.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "id": "33d5b85e-e155-4ce7-a915-aeecbf2d3442",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1309 entries, 0 to 1308\n",
      "Data columns (total 12 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   PassengerId  1309 non-null   int64  \n",
      " 1   Survived     891 non-null    float64\n",
      " 2   Pclass       1309 non-null   int64  \n",
      " 3   Name         1309 non-null   object \n",
      " 4   Sex          1309 non-null   object \n",
      " 5   Age          1046 non-null   float64\n",
      " 6   SibSp        1309 non-null   int64  \n",
      " 7   Parch        1309 non-null   int64  \n",
      " 8   Ticket       1309 non-null   object \n",
      " 9   Fare         1308 non-null   float64\n",
      " 10  Cabin        295 non-null    object \n",
      " 11  Embarked     1307 non-null   object \n",
      "dtypes: float64(3), int64(4), object(5)\n",
      "memory usage: 122.8+ KB\n"
     ]
    }
   ],
   "source": [
    "# 看一下有没有缺失的信息,有的话补上  age714 有缺失  Cabin204有缺失    Embarked889有缺失\n",
    " # Fare   少了\n",
    "datas.info() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "109c1a4b-1338-4b8f-a7ac-92eb71c1e1cd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Survived\n",
       "0    549\n",
       "1    342\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['Survived'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "id": "53045aa8-31b2-40da-bc52-13be3eddca90",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 四项少数据补上\n",
    "datas['Cabin'] = datas['Cabin'].fillna('U')\n",
    "datas['Cabin']= datas['Cabin'].str.get(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "id": "a2984a24-2b24-4d0c-aa06-93b2cbd470a4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Moran, Mr. James</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>330877</td>\n",
       "      <td>8.4583</td>\n",
       "      <td>U</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>Williams, Mr. Charles Eugene</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>244373</td>\n",
       "      <td>13.0000</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Masselmani, Mrs. Fatima</td>\n",
       "      <td>female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2649</td>\n",
       "      <td>7.2250</td>\n",
       "      <td>U</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>27</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Emir, Mr. Farred Chehab</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2631</td>\n",
       "      <td>7.2250</td>\n",
       "      <td>U</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>29</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>O'Dwyer, Miss. Ellen \"Nellie\"</td>\n",
       "      <td>female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>330959</td>\n",
       "      <td>7.8792</td>\n",
       "      <td>U</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1299</th>\n",
       "      <td>1300</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>Riordan, Miss. Johanna Hannah\"\"</td>\n",
       "      <td>female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>334915</td>\n",
       "      <td>7.7208</td>\n",
       "      <td>U</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1301</th>\n",
       "      <td>1302</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>Naughton, Miss. Hannah</td>\n",
       "      <td>female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>365237</td>\n",
       "      <td>7.7500</td>\n",
       "      <td>U</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1304</th>\n",
       "      <td>1305</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>Spector, Mr. Woolf</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>A.5. 3236</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1307</th>\n",
       "      <td>1308</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>Ware, Mr. Frederick</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>359309</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1308</th>\n",
       "      <td>1309</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>Peter, Master. Michael J</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2668</td>\n",
       "      <td>22.3583</td>\n",
       "      <td>U</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>263 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      PassengerId  Survived  Pclass                             Name     Sex  \\\n",
       "5               6       0.0       3                 Moran, Mr. James    male   \n",
       "17             18       1.0       2     Williams, Mr. Charles Eugene    male   \n",
       "19             20       1.0       3          Masselmani, Mrs. Fatima  female   \n",
       "26             27       0.0       3          Emir, Mr. Farred Chehab    male   \n",
       "28             29       1.0       3    O'Dwyer, Miss. Ellen \"Nellie\"  female   \n",
       "...           ...       ...     ...                              ...     ...   \n",
       "1299         1300       NaN       3  Riordan, Miss. Johanna Hannah\"\"  female   \n",
       "1301         1302       NaN       3           Naughton, Miss. Hannah  female   \n",
       "1304         1305       NaN       3               Spector, Mr. Woolf    male   \n",
       "1307         1308       NaN       3              Ware, Mr. Frederick    male   \n",
       "1308         1309       NaN       3         Peter, Master. Michael J    male   \n",
       "\n",
       "      Age  SibSp  Parch     Ticket     Fare Cabin Embarked  \n",
       "5     NaN      0      0     330877   8.4583     U        Q  \n",
       "17    NaN      0      0     244373  13.0000     U        S  \n",
       "19    NaN      0      0       2649   7.2250     U        C  \n",
       "26    NaN      0      0       2631   7.2250     U        C  \n",
       "28    NaN      0      0     330959   7.8792     U        Q  \n",
       "...   ...    ...    ...        ...      ...   ...      ...  \n",
       "1299  NaN      0      0     334915   7.7208     U        Q  \n",
       "1301  NaN      0      0     365237   7.7500     U        Q  \n",
       "1304  NaN      0      0  A.5. 3236   8.0500     U        S  \n",
       "1307  NaN      0      0     359309   8.0500     U        S  \n",
       "1308  NaN      1      1       2668  22.3583     U        C  \n",
       "\n",
       "[263 rows x 12 columns]"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# age714 有缺失     Embarked889有缺失 Fare   少了  其中年龄缺了263多行\n",
    "datas[datas['Age'].isnull()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "id": "02c16862-f41b-4d57-b686-22e5186c20cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestRegressor\n",
    "ages = datas[['Age', 'Pclass','Sex']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "id": "aceb36f0-cb7a-4d04-af25-c05e707dc1d2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Sex</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>3</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>3</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>3</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age  Pclass     Sex\n",
       "0  22.0       3    male\n",
       "1  38.0       1  female\n",
       "2  26.0       3  female\n",
       "3  35.0       1  female\n",
       "4  35.0       3    male"
      ]
     },
     "execution_count": 168,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ages.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "id": "a207b2da-bb57-47eb-84d9-e9bf73698f9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "ages=pd.get_dummies(ages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "id": "10f5c8c6-33f2-4678-bded-b89f674d36e1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Sex_female</th>\n",
       "      <th>Sex_male</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22.0</td>\n",
       "      <td>3</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>26.0</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>3</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age  Pclass  Sex_female  Sex_male\n",
       "0  22.0       3       False      True\n",
       "1  38.0       1        True     False\n",
       "2  26.0       3        True     False\n",
       "3  35.0       1        True     False\n",
       "4  35.0       3       False      True"
      ]
     },
     "execution_count": 170,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ages.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "id": "3498610e-451f-4b6b-a9b5-aaa7e0438e39",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1046, 4)\n"
     ]
    }
   ],
   "source": [
    "known_ages = ages[ages.Age.notnull()].values\n",
    "print(known_ages.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "id": "6b994a2b-849a-4f24-a7b1-57234300a9e5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(263, 4)\n"
     ]
    }
   ],
   "source": [
    "unknown_ages = ages[ages.Age.isnull()].values\n",
    "print(unknown_ages.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "id": "5bf0f299-c9b8-4bf2-8cfb-73cc39778293",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[22.0 38.0 26.0 ... 28.0 39.0 38.5]\n"
     ]
    }
   ],
   "source": [
    "y = known_ages[:, 0]\n",
    "print(y)\n",
    "# 年龄的那一列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "id": "874da0d0-83bd-4331-901b-bb94f4eb85ea",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[3 False True]\n",
      " [1 True False]\n",
      " [3 True False]\n",
      " ...\n",
      " [3 True False]\n",
      " [1 True False]\n",
      " [3 False True]]\n"
     ]
    }
   ],
   "source": [
    "X = known_ages[:, 1:]\n",
    "print(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "id": "f18ef368-1593-4b7a-96a6-017384e84b1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "rfr = RandomForestRegressor(random_state=60, n_estimators=100, n_jobs=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "id": "bc46dd3c-624a-4342-99a9-a6a6d7558092",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-3 {color: black;background-color: white;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestRegressor(n_jobs=-1, random_state=60)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" checked><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestRegressor</label><div class=\"sk-toggleable__content\"><pre>RandomForestRegressor(n_jobs=-1, random_state=60)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "RandomForestRegressor(n_jobs=-1, random_state=60)"
      ]
     },
     "execution_count": 176,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rfr.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "id": "6813441a-2b7b-4e89-9ac5-9fecc12a1cc2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# print(unknown_ages[:,1:])\n",
    "# 根据未知的后三列,预测年龄\n",
    "pre_ages = rfr.predict(unknown_ages[:, 1::])\n",
    "# print(pre_ages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "id": "28b56e34-9f73-4735-8a46-8f5e0acd4e44",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 用预测的年龄,填补空白纸\n",
    "datas.loc[ (datas.Age.isnull()), 'Age' ] = pre_ages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "id": "5b493295-9fd6-4372-838b-32132814cdd8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1309 entries, 0 to 1308\n",
      "Data columns (total 12 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   PassengerId  1309 non-null   int64  \n",
      " 1   Survived     891 non-null    float64\n",
      " 2   Pclass       1309 non-null   int64  \n",
      " 3   Name         1309 non-null   object \n",
      " 4   Sex          1309 non-null   object \n",
      " 5   Age          1309 non-null   float64\n",
      " 6   SibSp        1309 non-null   int64  \n",
      " 7   Parch        1309 non-null   int64  \n",
      " 8   Ticket       1309 non-null   object \n",
      " 9   Fare         1308 non-null   float64\n",
      " 10  Cabin        1309 non-null   object \n",
      " 11  Embarked     1307 non-null   object \n",
      "dtypes: float64(3), int64(4), object(5)\n",
      "memory usage: 122.8+ KB\n"
     ]
    }
   ],
   "source": [
    "datas.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "id": "3ba59a95-b73a-4b85-b460-e18168351f8c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>62</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Icard, Miss. Amelie</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>113572</td>\n",
       "      <td>80.0</td>\n",
       "      <td>B</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>829</th>\n",
       "      <td>830</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Stone, Mrs. George Nelson (Martha Evelyn)</td>\n",
       "      <td>female</td>\n",
       "      <td>62.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>113572</td>\n",
       "      <td>80.0</td>\n",
       "      <td>B</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     PassengerId  Survived  Pclass                                       Name  \\\n",
       "61            62       1.0       1                        Icard, Miss. Amelie   \n",
       "829          830       1.0       1  Stone, Mrs. George Nelson (Martha Evelyn)   \n",
       "\n",
       "        Sex   Age  SibSp  Parch  Ticket  Fare Cabin Embarked  \n",
       "61   female  38.0      0      0  113572  80.0     B      NaN  \n",
       "829  female  62.0      0      0  113572  80.0     B      NaN  "
      ]
     },
     "execution_count": 180,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datas[datas.Embarked.isnull()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "id": "df7b05ac-58a6-4159-96c4-d28c26a57d8c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1       0.0       3   \n",
       "1            2       1.0       1   \n",
       "2            3       1.0       3   \n",
       "3            4       1.0       1   \n",
       "4            5       0.0       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
       "\n",
       "   Parch            Ticket     Fare Cabin Embarked  \n",
       "0      0         A/5 21171   7.2500     U        S  \n",
       "1      0          PC 17599  71.2833     C        C  \n",
       "2      0  STON/O2. 3101282   7.9250     U        S  \n",
       "3      0            113803  53.1000     C        S  \n",
       "4      0            373450   8.0500     U        S  "
      ]
     },
     "execution_count": 181,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datas.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 182,
   "id": "0169b6c4-bbb8-43a2-b5a1-f420366b2416",
   "metadata": {},
   "outputs": [],
   "source": [
    "datas.loc[ (datas.Embarked.isnull()), 'Embarked'] = 'C'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "id": "8573e9ad-4acb-497b-b9a1-fa4eae78469b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1309 entries, 0 to 1308\n",
      "Data columns (total 12 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   PassengerId  1309 non-null   int64  \n",
      " 1   Survived     891 non-null    float64\n",
      " 2   Pclass       1309 non-null   int64  \n",
      " 3   Name         1309 non-null   object \n",
      " 4   Sex          1309 non-null   object \n",
      " 5   Age          1309 non-null   float64\n",
      " 6   SibSp        1309 non-null   int64  \n",
      " 7   Parch        1309 non-null   int64  \n",
      " 8   Ticket       1309 non-null   object \n",
      " 9   Fare         1308 non-null   float64\n",
      " 10  Cabin        1309 non-null   object \n",
      " 11  Embarked     1309 non-null   object \n",
      "dtypes: float64(3), int64(4), object(5)\n",
      "memory usage: 122.8+ KB\n"
     ]
    }
   ],
   "source": [
    "datas.info()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "id": "b1ed91d1-57c6-4eb7-be2e-ff014361ced2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1043</th>\n",
       "      <td>1044</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>Storey, Mr. Thomas</td>\n",
       "      <td>male</td>\n",
       "      <td>60.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3701</td>\n",
       "      <td>NaN</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      PassengerId  Survived  Pclass                Name   Sex   Age  SibSp  \\\n",
       "1043         1044       NaN       3  Storey, Mr. Thomas  male  60.5      0   \n",
       "\n",
       "      Parch Ticket  Fare Cabin Embarked  \n",
       "1043      0   3701   NaN     U        S  "
      ]
     },
     "execution_count": 184,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datas[datas.Fare.isnull()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "id": "20e2e24b-3e26-4aba-9e2d-cb1258e1773c",
   "metadata": {},
   "outputs": [],
   "source": [
    "datas.loc[ (datas.Fare.isnull()), 'Fare'] = 8.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "id": "a3da13cc-c78b-4095-886b-a367d50dc96c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1       0.0       3   \n",
       "1            2       1.0       1   \n",
       "2            3       1.0       3   \n",
       "3            4       1.0       1   \n",
       "4            5       0.0       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
       "\n",
       "   Parch            Ticket     Fare Cabin Embarked  \n",
       "0      0         A/5 21171   7.2500     U        S  \n",
       "1      0          PC 17599  71.2833     C        C  \n",
       "2      0  STON/O2. 3101282   7.9250     U        S  \n",
       "3      0            113803  53.1000     C        S  \n",
       "4      0            373450   8.0500     U        S  "
      ]
     },
     "execution_count": 186,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datas.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "id": "1094bd0f-52f1-4ae9-a69d-92b20c6ae7ef",
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "\"['Sex', 'Embarked'] not in index\"",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[191], line 7\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# datas=datas.drop('Name', axis=1)\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;66;03m# datas=datas.drop('Ticket',axis=1)\u001b[39;00m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;66;03m# datas=datas.drop('SibSp',axis=1)\u001b[39;00m\n\u001b[0;32m      4\u001b[0m \u001b[38;5;66;03m# datas=datas.drop('Parch',axis=1)\u001b[39;00m\n\u001b[0;32m      5\u001b[0m \u001b[38;5;66;03m# datas=datas.drop('PassengerId',axis=1)\u001b[39;00m\n\u001b[0;32m      6\u001b[0m \u001b[38;5;66;03m# datas['Cabin']= datas['Cabin'].str.get(0)\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m datas\u001b[38;5;241m=\u001b[39mdatas[[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSurvived\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPclass\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSex\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAge\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFare\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEmbarked\u001b[39m\u001b[38;5;124m'\u001b[39m]]\n\u001b[0;32m      8\u001b[0m datas\u001b[38;5;241m=\u001b[39mpd\u001b[38;5;241m.\u001b[39mget_dummies(datas)\n",
      "File \u001b[1;32mD:\\Anacnoda\\enverment\\Lib\\site-packages\\pandas\\core\\frame.py:3899\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   3897\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n\u001b[0;32m   3898\u001b[0m         key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[1;32m-> 3899\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39m_get_indexer_strict(key, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m   3901\u001b[0m \u001b[38;5;66;03m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[0;32m   3902\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(indexer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mbool\u001b[39m:\n",
      "File \u001b[1;32mD:\\Anacnoda\\enverment\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:6115\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[1;34m(self, key, axis_name)\u001b[0m\n\u001b[0;32m   6112\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   6113\u001b[0m     keyarr, indexer, new_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[1;32m-> 6115\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_if_missing(keyarr, indexer, axis_name)\n\u001b[0;32m   6117\u001b[0m keyarr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[0;32m   6118\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[0;32m   6119\u001b[0m     \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n",
      "File \u001b[1;32mD:\\Anacnoda\\enverment\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:6179\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[1;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[0;32m   6176\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m   6178\u001b[0m not_found \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[38;5;241m.\u001b[39mnonzero()[\u001b[38;5;241m0\u001b[39m]]\u001b[38;5;241m.\u001b[39munique())\n\u001b[1;32m-> 6179\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in index\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "\u001b[1;31mKeyError\u001b[0m: \"['Sex', 'Embarked'] not in index\""
     ]
    }
   ],
   "source": [
    "# datas=datas.drop('Name', axis=1)\n",
    "# datas=datas.drop('Ticket',axis=1)\n",
    "# datas=datas.drop('SibSp',axis=1)\n",
    "# datas=datas.drop('Parch',axis=1)\n",
    "# datas=datas.drop('PassengerId',axis=1)\n",
    "# datas['Cabin']= datas['Cabin'].str.get(0)\n",
    "datas=datas[['Survived','Pclass','Sex','Age','Fare','Embarked']]\n",
    "datas=pd.get_dummies(datas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "id": "c3864d6c-f28d-4901-b587-7aba7f1bbfec",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Age</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Sex_female</th>\n",
       "      <th>Sex_male</th>\n",
       "      <th>Embarked_C</th>\n",
       "      <th>Embarked_Q</th>\n",
       "      <th>Embarked_S</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>22.0</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>38.0</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>26.0</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>35.0</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>35.0</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Survived  Pclass   Age     Fare  Sex_female  Sex_male  Embarked_C  \\\n",
       "0       0.0       3  22.0   7.2500       False      True       False   \n",
       "1       1.0       1  38.0  71.2833        True     False        True   \n",
       "2       1.0       3  26.0   7.9250        True     False       False   \n",
       "3       1.0       1  35.0  53.1000        True     False       False   \n",
       "4       0.0       3  35.0   8.0500       False      True       False   \n",
       "\n",
       "   Embarked_Q  Embarked_S  \n",
       "0       False        True  \n",
       "1       False       False  \n",
       "2       False        True  \n",
       "3       False        True  \n",
       "4       False        True  "
      ]
     },
     "execution_count": 192,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datas.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "id": "8e7746ea-054b-47ff-8842-5ccbd78335db",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "train=datas[datas['Survived'].notnull()]\n",
    "test=datas[datas['Survived'].isnull()].drop('Survived',axis=1)\n",
    "X = train.values[:,1:]\n",
    "y = train.values[:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "id": "39afcc4a-eabc-43ec-a12d-f3f713c66f4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression as LR\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.feature_selection import SelectFromModel\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "lr = LR(penalty=\"l2\",solver=\"liblinear\",C=0.9850000000000004,max_iter=200).fit(X,y.astype('int'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "id": "30931d6c-1856-4c31-838f-0f4312375ecd",
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions = lr.predict(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "id": "a3c290e6-e907-489e-b43a-c47eb9d5a11a",
   "metadata": {},
   "outputs": [],
   "source": [
    "testcsv=pd.read_csv('test.csv')\n",
    "PassengerId=testcsv['PassengerId']\n",
    "prdict_test = pd.DataFrame({\"PassengerId\": PassengerId, \"Survived\": predictions.astype(np.int32)})\n",
    "prdict_test.to_csv(\"prdict_test11.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f09f038b-2300-4b32-b7a8-d047d1842a2e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
